<?php
namespace app\index\controller;

use app\index\model\RegionModel;
use think\Controller;

/**
 * 地址解析(为了支持榴莲运动专门做的修改)
 * area_parse_detail.php
 * @author yuexch
 * @createtime 2019/12/26 17:50
 */

class Region extends Controller
{

    //暂时用短的名称视为姓名
    private function getRealName($address=''){
        $name = '';
        if ($address){
            //百家姓尝试取姓名
            $re = '/(赵|钱|孙|李|周|吴|郑|王|冯|陈|楮|卫|蒋|沈|韩|杨|朱|秦|尤|许|何|吕|施|张|孔|曹|严|华|金|魏|陶|姜|戚|谢|邹|喻|柏|水|窦|章|云|苏|潘|葛|奚|范|彭|郎|鲁|韦|昌|马|苗|凤|花|方|俞|任|袁|柳|酆|鲍|史|唐|费|廉|岑|薛|雷|贺|倪|汤|滕|殷|罗|毕|郝|邬|安|常|乐|于|时|傅|皮|卞|齐|康|伍|余|元|卜|顾|孟|平|黄|和|穆|萧|尹|姚|邵|湛|汪|祁|毛|禹|狄|米|贝|明|臧|计|伏|成|戴|谈|宋|茅|庞|熊|纪|舒|屈|项|祝|董|梁|杜|阮|蓝|闽|席|季|麻|强|贾|路|娄|危|江|童|颜|郭|梅|盛|林|刁|锺|徐|丘|骆|高|夏|蔡|田|樊|胡|凌|霍|虞|万|支|柯|昝|管|卢|莫|经|房|裘|缪|干|解|应|宗|丁|宣|贲|邓|郁|单|杭|洪|包|诸|左|石|崔|吉|钮|龚|程|嵇|邢|滑|裴|陆|荣|翁|荀|羊|於|惠|甄|麹|家|封|芮|羿|储|靳|汲|邴|糜|松|井|段|富|巫|乌|焦|巴|弓|牧|隗|山|谷|车|侯|宓|蓬|全|郗|班|仰|秋|仲|伊|宫|宁|仇|栾|暴|甘|斜|厉|戎|祖|武|符|刘|景|詹|束|龙|叶|幸|司|韶|郜|黎|蓟|薄|印|宿|白|怀|蒲|邰|从|鄂|索|咸|籍|赖|卓|蔺|屠|蒙|池|乔|阴|郁|胥|能|苍|双|闻|莘|党|翟|谭|贡|劳|逄|姬|申|扶|堵|冉|宰|郦|雍|郤|璩|桑|桂|濮|牛|寿|通|边|扈|燕|冀|郏|浦|尚|农|温|别|庄|晏|柴|瞿|阎|充|慕|连|茹|习|宦|艾|鱼|容|向|古|易|慎|戈|廖|庾|终|暨|居|衡|步|都|耿|满|弘|匡|国|文|寇|广|禄|阙|东|欧|殳|沃|利|蔚|越|夔|隆|师|巩|厍|聂|晁|勾|敖|融|冷|訾|辛|阚|那|简|饶|空|曾|毋|沙|乜|养|鞠|须|丰|巢|关|蒯|相|查|后|荆|红|游|竺|权|逑|盖|益|桓|公|万俟|司马|上官|欧阳|夏侯|诸葛|闻人|东方|赫连|皇甫|尉迟|公羊|澹台|公冶|宗政|濮阳|淳于|单于|太叔|申屠|公孙|仲孙|轩辕|令狐|锺离|宇文|长孙|慕容|鲜于|闾丘|司徒|司空|丌官|司寇|仉|督|子车|颛孙|端木|巫马|公西|漆雕|乐正|壤驷|公良|拓拔|夹谷|宰父|谷梁|晋|楚|阎|法|汝|鄢|涂|钦|段干|百里|东郭|南门|呼延|归|海|羊舌|微生|岳|帅|缑|亢|况|后|有|琴|梁丘|左丘|东门|西门|商|牟|佘|佴|伯|赏|南宫|墨|哈|谯|笪|年|爱|阳|佟|第五|言|福)[\x{4e00}-\x{9fa5}]{1,2}/u';
            preg_match($re, $address, $matches);
            if (isset($matches[0]) && $matches[0]){
                $name = $matches[0];
                $address = trim(str_replace($name, '', $address));
            }
        }
        return ['address' => $address,'name'=>$name];
    }

    /**
     * 地址智能解析
     * @param string 包含丰富信息的字符串,词语分隔符可以是中英文(逗号,句号,分号,冒号)或空格
     * @return array 姓名，手机号，邮编，详细地址
     */
    public static function smart_parse($address='',$json=1)
    {
        //解析结果
        $parse = [];
        if ($address){
            //1. 过滤掉收货地址中的常用说明字符，排除干扰词
            $search = ['收货地址', '地址', '收货人', '收件人', '收货', '邮编', '电话', '身份证号码', '身份证号', '身份证', '：', ':', '；', ';', '，', ',', '。', ];
            $replace = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '];
            $address = str_replace($search, $replace, $address);
            //2. 把空白字符(包括空格\r\n\t)都换成一个空格
            $address = preg_replace('/\s{1,}/', ' ', $address);
            //3. 去除手机号码中的短横线 如136-3333-6666 主要针对苹果手机
            $address = preg_replace('/0-|0?(\d{3})-(\d{4})-(\d{4})/', '$1$2$3', $address);
            //4. 提取中国境内身份证号码
            preg_match('/\d{18}|\d{17}X/i', $address, $match);
            if ($match && $match[0]) {
                $parse['idno'] = strtoupper($match[0]);
                $address = str_replace($match[0], '', $address);
            }

            //5. 提取11位手机号码或者7位以上座机号
            preg_match('/\d{7,11}|\d{3,4}-\d{6,8}/', $address, $match);
            if ($match && $match[0]) {
                $parse['mobile'] = $match[0];
                $address = str_replace($match[0], '', $address);
            }

            //6. 提取6位邮编 邮编也可用后面解析出的省市区地址从数据库匹配出
            preg_match('/\d{6}/', $address, $match);
            if ($match && $match[0]) {
                $parse['postcode'] = $match[0];
                $address = str_replace($match[0], '', $address);
            }

            //再次把2个及其以上的空格合并成一个，并TRIM
            $address = trim(preg_replace('/ {2,}/', ' ', $address));

            //按照空格切分 长度长的为地址 短的为姓名 因为不是基于自然语言分析，所以采取统计学上高概率的方案
            $split_arr = explode(' ', $address);
            if (count($split_arr) > 1) {
                $parse['name'] = $split_arr[0];
                foreach ($split_arr as $value) {
                    if (strlen($value) < strlen($parse['name'])) {
                        $parse['name'] = $value;
                    }
                }
                $address = trim(str_replace($parse['name'], '', $address));
            }
            $parse['detail_origin'] = $address;
            //parse['detail']详细地址可以传入detail_parse函数，用来解析出：省，市，区，街道地址
            $parse['detail'] = Region::detail_parse($address,0);
        }
        if ($json){
            return json_encode($parse);
        }else{
            return $parse;
        }
    }

    /**
     * 地址智能解析 为了提现思路，注释较多，经过测试，识别率在95%左右
     * 非自然语言处理，但由于国家的地址省市区街道是有特征和统计规律的
     * 所以本程序才能产生识别效果，但还是要考虑特殊情况，如自治州，县级市等
     * 可使用本仓库的SQL地区库
     * // 测试例子
     * // $detail = '成都市高新区天府软件园B区科技大楼';
     * // $detail = '双流县郑通路社保局区52050号';
     * // $detail = '岳市岳阳楼区南湖求索路碧灏花园A座1101';
     * // $detail = '四川省南充市阆中市公园路25号';
     * // $detail = '四川省阆中市公园路25号';
     * // $detail = '四川省 凉山州美姑县xxx小区18号院';
     * // $detail = '重庆攀枝花市东区机场路3中学校';
     * // $detail = '渝北区渝北中学51200街道地址';
     * // $detail = '天津天津市红桥区水木天成1区临湾路9-3-1101';
     * @param string 收货地址 不含姓名手机号
     * @return array 省，市，区，街道地址
     */
    public static function detail_parse($detail,$json=1)
    {
        $detail_origin = $detail; //保存原始参数
        $detail = str_replace(' ', '', $detail);
        $detail = str_replace("自治区", "省", $detail);  //避免自治区被错误识别
        $detail = str_replace("自治州", "州", $detail);  //避免自治区被错误识别
        //返回结果
        $result = [];

        /**
         * 1. 三级地址识别 共有2992个三级地址 高频词为【县，区，旗，市】是整个识别系统的关键
         * 返回 [%第3级% 模糊地址] [街道地址]
         * 三级地址 前面一般2或3个字符就够用了【3个字符，比如高新区，仁和区，武侯区，占比96%】【2个字符的县和区有140个左右，占比4%，比如理县】
         */
        if (mb_strstr($detail, '县') || mb_strstr($detail, '区') || mb_strstr($detail, '旗')) {
            // 如果同时出现 县和区 我们可以确定的是县一定在区前面，所以下面三个if顺序是有要求的，不能随便调整
            if (mb_strstr($detail, '旗')) {
                $deep3_keyword_pos = mb_strpos($detail, '旗');
                $deep3_area_name = mb_substr($detail, $deep3_keyword_pos - 1, 2);
            }

            if (mb_strstr($detail, '区')) {
                $deep3_keyword_pos = mb_strpos($detail, '区');

                // 判断区、市是同时存在 同时存在 可以简单 比如【攀枝花市东区攀枝花三中高三班2010级】
                if (mb_strstr($detail, '市')) {
                    $city_pos = mb_strpos($detail, '市');
                    $zone_pos = mb_strpos($detail, '区');
                    $deep3_area_name = mb_substr($detail, $city_pos + 1, $zone_pos - $city_pos);
                } else {
                    $deep3_area_name = mb_substr($detail, $deep3_keyword_pos - 2, 3);
                    //县名称最大的概率为3个字符 美姑县 阆中市 高新区
                }
            }

            if (mb_strstr($detail, '县')) {
                $deep3_keyword_pos = mb_strpos($detail, '县');
                // 判断县市是同时存在 同时存在 可以简单 比如【湖南省常德市澧县】
                if (mb_strstr($detail, '市')) {
                    $city_pos = mb_strpos($detail, '市');
                    $zone_pos = mb_strpos($detail, '县');
                    $deep3_area_name = mb_substr($detail, $city_pos + 1, $zone_pos - $city_pos);
                } else {
                    //考虑形如【甘肃省东乡族自治县布楞沟村1号】的情况
                    if (mb_strstr($detail, '自治县')){
                        $deep3_area_name = mb_substr($detail, $deep3_keyword_pos - 6, 7);
                        if(in_array(mb_substr($deep3_area_name, 0, 1) , ['省', '市', '州'] )){
                            $deep3_area_name = mb_substr($deep3_area_name, 1);
                        }
                    }else{
                        $deep3_area_name = mb_substr($detail, $deep3_keyword_pos - 2, 3);
                    }
                    //县名称最大的概率为3个字符 美姑县 阆中市 高新区
                }
            }

            $street = mb_substr($detail, $deep3_keyword_pos + 1);
        } else {
            if (mb_strripos($detail, '市')) {
                //最大的可能性为县级市 可能的情况有【四川省南充市阆中市公园路25号，四川省南充市阆中市公园路25号】市要找【最后一次】出现的位置
                $deep3_keyword_pos = mb_strripos($detail, '市');
                $deep3_area_name = mb_substr($detail, $deep3_keyword_pos - 2, 3);
                $street = mb_substr($detail, $deep3_keyword_pos + 1);
            } else {
                //不能识别的解析
                $deep3_area_name = '';
                $street = $detail;
            }
        }

        /**
         * 2. 二级地址的识别 共有410个二级地址 高频词为【市，盟，州】 高频长度为3,4个字符 因为有用户可能会填写 '四川省阆中市'，所以二级地址的识别可靠性并不高 需要与三级地址 综合使用
         * 返回 [%第2级% 模糊地址]
         */
        if (mb_strrpos($detail, '市') || mb_strstr($detail, '盟') || mb_strstr($detail, '州')) {
            if ($tmp_pos = mb_strrpos($detail, '市')) {
                $deep2_area_name = mb_substr($detail, $tmp_pos - 2, 3);
            }

            if ($tmp_pos = mb_strrpos($detail, '盟')) {
                $deep2_area_name = mb_substr($detail, $tmp_pos - 2, 3);
            }

            if ($tmp_pos = mb_strrpos($detail, '州')) {
                //考虑自治州的情况
                if($tmp_pos = mb_strrpos($detail, '自治州')) {
                    $deep2_area_name = mb_substr($detail, $tmp_pos-4, 5);
                }else{
                    $deep2_area_name = mb_substr($detail, $tmp_pos-2, 3);
                }
            }
        } else {
            $deep2_area_name = '';
        }

        //3. 到数据中智能匹配
        if ($deep3_area_name != '') {
            $model_area = new RegionModel();
            //数据库匹配 以下的数据库匹配需要程序员根据自己的框架自行替换
            $condition = [];
            $condition[] = ['region_type','eq',3];
            $condition[] = ['region_name','like','%'.$deep3_area_name.'%'];
            $deep3_area_list = $model_area->getAreaList($condition);
            // 三级地址的匹配出现多个结果 依靠二级地址缩小范围
            if ($deep3_area_list && count($deep3_area_list) > 1) {
                if ($deep2_area_name) {
                    $condition = [];
                    $condition[] = ['region_type','eq',2];
                    $condition[] = ['region_name','like','%'.$deep2_area_name.'%'];
                    $area_info_2 = $model_area->getAreaInfo($condition);
                    //2级地址匹配成功 再次缩小三级地址 然后确定一级地址
                    if ($area_info_2) {
                        $condition = [];
                        $condition[] = ['parent_id','eq',$area_info_2['region_id']];
                        $condition[] = ['region_name','like','%'.$deep3_area_name.'%'];
                        $area_info_3 = $model_area->getAreaInfo($condition);
                        $condition = [];
                        $condition[] = ['region_type','eq',1];
                        $condition[] = ['region_id','eq',$area_info_2['parent_id']];
                        $area_info_1 = $model_area->getAreaInfo($condition);
                        if ($area_info_1){
                            //获得结果
                            $result[1]['region_id'] = $area_info_2['parent_id'];
                            $result[1]['region_name'] = $area_info_1['region_name'];
                            $result[2]['region_id'] = $area_info_2['region_id'];
                            $result[2]['region_name'] = $area_info_2['region_name'];
                            $result[3]['region_id'] = $area_info_3['region_id'];
                            $result[3]['region_name'] = $area_info_3['region_name'];
                        }
                    }
                }

            } else {
                if ($deep3_area_list && count($deep3_area_list) == 1) {
                    $condition = [];
                    $condition[] = ['region_id','eq',$deep3_area_list[0]['parent_id']];
                    $condition[] = ['region_type','eq',2];
                    $area_info_2 = $model_area->getAreaInfo($condition);

                    if ($area_info_2) {
                        $condition = [];
                        $condition[] = ['region_id','eq',$area_info_2['parent_id']];
                        $condition[] = ['region_type','eq',1];
                        $area_info_1 = $model_area->getAreaInfo($condition);

                        //获得结果
                        $result[1]['region_id'] = $area_info_2['parent_id'];
                        $result[1]['region_name'] = $area_info_1['region_name'];
                        $result[2]['region_id'] = $area_info_2['region_id'];
                        $result[2]['region_name'] = $area_info_2['region_name'];
                        $result[3]['region_id'] = $deep3_area_list[0]['region_id'];
                        $result[3]['region_name'] = $deep3_area_list[0]['region_name'];
                    }

                }elseif($deep2_area_name == $deep3_area_name){   //如出现内蒙古自治区乌兰察布市公安局交警支队车管所这种只有省市，没有区的情况
                    $condition = [];
                    $condition[] = ['region_type','eq',2];
                    $condition[] = ['region_name','like','%'.$deep2_area_name.'%'];
                    $area_info_2 = $model_area->getAreaInfo($condition);
                    if ($area_info_2) {
                        $condition = [];
                        $condition[] = ['region_id','eq',$area_info_2['parent_id']];
                        $condition[] = ['region_type','eq',1];
                        $area_info_1 = $model_area->getAreaInfo($condition);
                        //获得结果
                        if($area_info_1){
                            $result[1]['region_id'] = $area_info_2['parent_id'];
                            $result[1]['region_name'] = $area_info_1['region_name'];
                            $result[2]['region_id'] = $area_info_2['region_id'];
                            $result[2]['region_name'] = $area_info_2['region_name'];
                            $result[3]['region_id'] = '';
                            $result[3]['region_name'] = '';
                        }
                    }

                }
            }
        }
        if ($result){
            $addr = $detail_origin;
            foreach ($result as $k=>$v){
                if ($v['region_name']){
                    $addr = delStr($v['region_name'],$v['region_name'],$addr,0);
                }
            }
            $arae_replace = ['自治县','县','区','旗','市','盟','州'];
            foreach ($arae_replace as $k=>$v){
                if ($v){
                    $addr = delStr($v,$v,$addr,0);
                }
            }
            if ($addr){
                $result[4]['region_id'] = '';
                $result[4]['region_name'] = $addr;
            }
        }
        //最终结果
        if ($json){
            return json_encode($result);
        }else{
            return $result;
        }
    }



}